This dataset originates from a nutrigenomic study in mice (Martin et al., 2007), designed to investigate how dietary fatty acid composition influences liver lipid profiles and hepatic gene expression.
Experimental Design
Forty mice were cross-classified according to two experimental factors, each with multiple levels:
Genotype (2 levels):
Wild-type (WT)
PPARα knockout (PPARα -/-)
Diet (5 levels):
REF: Reference diet (50/50 corn and colza oils)
COC: Saturated fatty acid-rich diet (hydrogenated coconut oil)
SUN: Omega-6-rich diet (sunflower oil)
LIN: Omega-3-rich diet (linseed oil)
FISH: Mixed diet (corn/colza/enriched fish oils in 43/43/14 ratio)
Each genotype-diet combination includes four biological replicates.
Variables Collected
Two sets of variables were measured for each mouse:
Expression levels of 120 selected hepatic genes, chosen from ~30,000
candidates for their relevance to nutritional regulation.
Measured using nylon macroarrays with radioactive labeling.
Relative concentrations (percentages) of 21 hepatic fatty
acids.
Quantified via gas chromatography.
data("nutrimouse")
genes <- nutrimouse$gene
lipids <- nutrimouse$lipid
metadata <- data.frame(genotype = nutrimouse$genotype, diet = nutrimouse$diet)
metadata$sample_name <- paste0(rownames(metadata), "_", metadata$genotype, "_", metadata$diet)
rownames(genes) <- metadata$sample_name
rownames(lipids) <- metadata$sample_name
# prepare data
blockPLS_data <- list(genes=genes, lipids=lipids)
genotype <- as.factor(metadata$genotype)
# run analysis
blockPLS_res <- block.plsda(X = blockPLS_data, Y = genotype, design = "full", all.outputs = T, ncomp = 10)
bp <- MulticoreParam(workers = 4)
blockPLS_perf <- perf(blockPLS_res, validation = 'Mfold', folds = 7, nrepeat = 1, auc = TRUE, BPPARAM = bp, progressBar = FALSE)
blockPLS_perf <- perf(
blockPLS_res,
validation = "Mfold",
folds = 7,
nrepeat = 100,
auc = TRUE,
BPPARAM = bp,
seed = 1
)
plot(blockPLS_perf)
blockPLS_res <- block.plsda(X = blockPLS_data, Y = genotype, design = "full", all.outputs = T, ncomp = 2)
# Cross-validation with DIABLO.cv()
blockPLS_cv <- DIABLO.cv(blockPLS_res,
validation = c("Mfold"),
k = 7,
repet = 100)
blockPLS_cv
##
## Cross validation
##
## Model: DIABLO
## 7-fold validation
## Validation repeated 100 times
## 2 components
##
## Classification criterion: Mahalanobis distance
##
## Mean (standard error) classification error rate (%): 0.575 (0.012)
# Permutation test with DIABLO.test()
blockPLS_permtest <- DIABLO.test(blockPLS_res,
nperm = 100,
progress = TRUE)
## | | | 0% | |= | 1% | |= | 2% | |== | 3% | |=== | 4% | |==== | 5% | |==== | 6% | |===== | 7% | |====== | 8% | |====== | 9% | |======= | 10% | |======== | 11% | |======== | 12% | |========= | 13% | |========== | 14% | |========== | 15% | |=========== | 16% | |============ | 17% | |============= | 18% | |============= | 19% | |============== | 20% | |=============== | 21% | |=============== | 22% | |================ | 23% | |================= | 24% | |================== | 25% | |================== | 26% | |=================== | 27% | |==================== | 28% | |==================== | 29% | |===================== | 30% | |====================== | 31% | |====================== | 32% | |======================= | 33% | |======================== | 34% | |======================== | 35% | |========================= | 36% | |========================== | 37% | |=========================== | 38% | |=========================== | 39% | |============================ | 40% | |============================= | 41% | |============================= | 42% | |============================== | 43% | |=============================== | 44% | |================================ | 45% | |================================ | 46% | |================================= | 47% | |================================== | 48% | |================================== | 49% | |=================================== | 50% | |==================================== | 51% | |==================================== | 52% | |===================================== | 53% | |====================================== | 54% | |====================================== | 55% | |======================================= | 56% | |======================================== | 57% | |========================================= | 58% | |========================================= | 59% | |========================================== | 60% | |=========================================== | 61% | |=========================================== | 62% | |============================================ | 63% | |============================================= | 64% | |============================================== | 65% | |============================================== | 66% | |=============================================== | 67% | |================================================ | 68% | |================================================ | 69% | |================================================= | 70% | |================================================== | 71% | |================================================== | 72% | |=================================================== | 73% | |==================================================== | 74% | |==================================================== | 75% | |===================================================== | 76% | |====================================================== | 77% | |======================================================= | 78% | |======================================================= | 79% | |======================================================== | 80% | |========================================================= | 81% | |========================================================= | 82% | |========================================================== | 83% | |=========================================================== | 84% | |============================================================ | 85% | |============================================================ | 86% | |============================================================= | 87% | |============================================================== | 88% | |============================================================== | 89% | |=============================================================== | 90% | |================================================================ | 91% | |================================================================ | 92% | |================================================================= | 93% | |================================================================== | 94% | |================================================================== | 95% | |=================================================================== | 96% | |==================================================================== | 97% | |===================================================================== | 98% | |===================================================================== | 99% | |======================================================================| 100%
blockPLS_permtest
##
## Permutation test based on cross-validation
##
## data: blockPLS_res
## DIABLO (2 components)
## 100 permutations
## CER = 0.00875, p-value = 0.009901
Plot explained variance per block and globally across latent variables. - Percentage of variance explained in each block by each LV: AVE_X - How strongly each block contributes to global LV structure: AVE[[“AVE_outer”]]
blockPLS_expl <- do.call("rbind",blockPLS_res$AVE$AVE_X[1:2])
# extract the cumulative global explained variance
ave_outer_cum <- blockPLS_res$AVE[["AVE_outer"]]
# convert to incremental (per component)
ave_outer_inc <- data.frame(comp1 = ave_outer_cum[1], comp2= diff(ave_outer_cum), row.names = "global")
blockPLS_expl <- rbind(blockPLS_expl, ave_outer_inc)
blockPLS_expl$block <- rownames(blockPLS_expl)
blockPLS_expl <- melt(blockPLS_expl)
blockPLS_expl$block <- factor(blockPLS_expl$block, levels = c("genes", "lipids", "global"))
ggplot(blockPLS_expl, aes(x=variable, y=value, fill=block)) +
geom_bar(stat="identity", position=position_dodge()) +
labs(x="component",
y="% of explained variance") +
theme_light()
plotIndiv(blockPLS_res, block = "weighted.average")
# ou:
blockPLS_variates.weighted <- blockPLS_res$variates[c("genes", "lipids")]
for(omic in c("genes", "lipids")){
for(comp in c("comp1", "comp2")){
blockPLS_variates.weighted[[omic]][,comp] <- blockPLS_variates.weighted[[omic]][,comp] * blockPLS_res$weights[omic, comp]
}
}
blockPLS_scores.weighted <- abind(blockPLS_variates.weighted[c("genes", "lipids")], along = 3)
blockPLS_scores.weighted <- apply(blockPLS_scores.weighted, c(1,2), mean)
blockPLS_scores.weighted <- data.frame(metadata, blockPLS_scores.weighted)
ggplot(blockPLS_scores.weighted, aes(x=comp1, y=comp2, col=diet, shape = genotype)) +
geom_point(size=4) +
theme_light()
plotVar(blockPLS_res)
# ou
blockPLS_loadings_genes <- blockPLS_res$loadings$genes
blockPLS_loadings_lipids <- blockPLS_res$loadings$lipids
blockPLS_loadings <- rbind.data.frame(blockPLS_loadings_genes, blockPLS_loadings_lipids)
blockPLS_loadings$omic <- c(rep("genes", dim(genes)[[2]]), rep("lipids", dim(lipids)[[2]]))
blockPLS_loadings$variable <- rownames(blockPLS_loadings)
ggplot(blockPLS_loadings, aes(x=comp1, y=comp2, col=omic, label=variable)) +
geom_point() +
geom_text_repel() +
theme_light()
COPLS_data <- list(genes=as.matrix(genes), lipids=as.matrix(lipids))
COPLS_data <- lapply(COPLS_data, scale)
genotype <- metadata$genotype
dummy_genotype <- as.matrix(data.frame(wt = ifelse(genotype == "wt", 1, 0),ppar = ifelse(genotype == "ppar", 1, 0)))
COPLS_res <- ConsensusOPLS(
data = COPLS_data,
Y = genotype,
maxPcomp = 1,
maxOcomp = 1,
modelType = "da",
cvType = "nfold",
nfold = 5,
nperm = 100,
verbose = T,
kernelParams = list(type='p', params = c(order=1.0))
)
COPLS_res$permStats.COPLS_res$optimal$modelCV and
COPLS_res$optimal$modelCV$cvplotQ2(COPLS_res)
plotDQ2(COPLS_res)
plotR2(COPLS_res)
#or
Q2perm <- data.frame(Q2perm = COPLS_res@permStats$Q2Y)
ggplot(data = Q2perm, aes(x = Q2perm)) +
geom_histogram(color="grey", fill="grey") +
geom_vline(aes(xintercept=COPLS_res@Q2["po1"]),color="blue", linetype="dashed", size=1) +
geom_text(x=COPLS_res@Q2["po1"]-0.15, y=10, label=round(COPLS_res@Q2["po1"],3), col="blue") +
theme_classic() +
ggtitle("Q2 Permutation test")
DQ2perm <- data.frame(DQ2perm = COPLS_res@permStats$DQ2Y)
ggplot(data = DQ2perm, aes(x = DQ2perm)) +
geom_histogram(color="grey", fill="grey") +
geom_vline(aes(xintercept=COPLS_res@DQ2["po1"]),color="blue", linetype="dashed", size=1) +
geom_text(x=COPLS_res@DQ2["po1"]-0.15, y=10, label=round(COPLS_res@DQ2["po1"],3), col="blue") +
theme_classic() +
ggtitle("DQ2 Permutation test")
R2Yperm <- data.frame(R2Yperm = COPLS_res@permStats$R2Y)
ggplot(data = R2Yperm, aes(x = R2Yperm)) +
geom_histogram(color="grey", fill="grey") +
geom_vline(aes(xintercept=COPLS_res@R2Y["po1"]),color="blue", linetype="dashed", size=1) +
geom_text(x=COPLS_res@R2Y["po1"]-0.1, y=10, label=round(COPLS_res@R2Y["po1"],3), col="blue") +
theme_classic() +
ggtitle("R2Y Permutation test")
-Plot block contributions as a bar plot to see which omics layer drives genotype discrimination.
plotContribution(COPLS_res)
# or
contributions <- COPLS_res@blockContribution
contributions <- melt(contributions)
colnames(contributions) <- c("dataset", "Dim", "value")
ggplot(contributions, aes(x=Dim, y=value, fill=dataset)) +
geom_bar(stat = "identity", position=position_dodge()) +
theme_light() +
labs(x = "global components", y = "specific weights of block on global components", fill = "omic",
title = "contributions")
plotScores(COPLS_res)
# or
scores <- data.frame(metadata, COPLS_res@scores)
ggplot(scores, aes(x=p_1, y=o_1, col=diet, shape = genotype)) +
geom_point(size=4) +
labs(x="Predictive",
y="Orthogonal",
title = "scores plots on predictive vs orthogonal latent variables") +
theme_light()
plotLoadings(COPLS_res)
# or
loadings <- rbind.data.frame(COPLS_res@loadings$genes, COPLS_res@loadings$lipids)
loadings$dataset <- c(rep("genes", nrow(COPLS_res@loadings$genes)), rep("lipids", nrow(COPLS_res@loadings$lipids)))
loadings$variable <- rownames(loadings)
ggplot(loadings, aes(x=p_1, y=o_1, col=dataset, label = variable)) +
geom_point(size=2) +
labs(x="Predictive",
y="Orthogonal",
title = "loadings plots on predictive vs orthogonal latent variables") +
geom_text_repel() +
theme_light()
plotVIP(COPLS_res)
# or
VIP <- data.frame(VIP = c(COPLS_res@VIP$genes$p, COPLS_res@VIP$lipids$p), variable = c(rownames(COPLS_res@VIP$genes), rownames(COPLS_res@VIP$lipids)))
loadings_VIP <- merge(loadings, VIP, by="variable")
loadings_VIP$label <- ifelse(loadings_VIP$VIP > 1, loadings_VIP$variable, NA)
ggplot(loadings_VIP, aes(x=p_1, y=VIP, col=dataset, label = label)) +
geom_point(size=2) +
labs(x="Predictive",
y="VIP",
title = "VIP vs loadings on predictive latent variable") +
geom_text_repel(size=3, max.overlaps = 50, segment.size=.1) +
theme_light()
Train a Consensus OPLS-DA model to discriminate between WT and PPAR samples using a training set and evaluate its performance on an independent test set.
test.observations <-c(sample(1:20, 5, replace = F),
sample(21:40, 5, replace = F))
train.data <- list(genes=as.matrix(genes[-test.observations,]),
lipids=as.matrix(lipids[-test.observations,]))
train.data <- lapply(train.data, scale)
test.data <- list(genes=as.matrix(genes[test.observations,]),
lipids=as.matrix(lipids[test.observations,]))
test.data <- lapply(test.data, scale)
train.genotype <- metadata$genotype[-test.observations]
train.COPLS_res <- ConsensusOPLS(
data = train.data,
Y = train.genotype,
maxPcomp = 1,
maxOcomp = 1,
modelType = "da",
cvType = "nfold",
nfold = 5,
nperm = 100,
verbose = T,
kernelParams = list(type='p', params = c(order=1.0))
)
plotQ2(train.COPLS_res)
plotDQ2(train.COPLS_res)
plotR2(train.COPLS_res)
plotContribution(train.COPLS_res)
plotScores(train.COPLS_res)
ConsensusOPLS::plotLoadings(train.COPLS_res) #because mixOmics also has a function called plotLoadings()
plotVIP(train.COPLS_res)
test.COPLS_res <- predict(object = train.COPLS_res,
newdata = test.data)
data.frame(test.COPLS_res$class,
Y.pred = test.COPLS_res$Y,
True.genotype=metadata$genotype[test.observations])
## class margin softmax.ppar softmax.wt Y.pred.ppar Y.pred.wt
## 13_wt_sun wt 1.4480737 0.000000e+00 1.0000000 -0.22403687 1.22403687
## 15_wt_sun wt 1.6280116 0.000000e+00 1.0000000 -0.31400582 1.31400582
## 7_wt_lin wt 0.8654443 2.505154e-06 0.9999975 0.06727783 0.93272217
## 17_wt_coc wt 1.3558755 0.000000e+00 1.0000000 -0.17793775 1.17793775
## 18_wt_fish wt 1.9643805 0.000000e+00 1.0000000 -0.48219027 1.48219027
## 39_ppar_lin ppar 1.7628255 1.000000e+00 0.0000000 1.38141275 -0.38141275
## 31_ppar_coc ppar 1.0809118 1.000000e+00 0.0000000 1.04045588 -0.04045588
## 25_ppar_sun ppar 2.1437467 1.000000e+00 0.0000000 1.57187333 -0.57187333
## 26_ppar_ref ppar 1.2415829 1.000000e+00 0.0000000 1.12079143 -0.12079143
## 36_ppar_coc ppar 1.0327190 1.000000e+00 0.0000000 1.01635948 -0.01635948
## True.genotype
## 13_wt_sun wt
## 15_wt_sun wt
## 7_wt_lin wt
## 17_wt_coc wt
## 18_wt_fish wt
## 39_ppar_lin ppar
## 31_ppar_coc ppar
## 25_ppar_sun ppar
## 26_ppar_ref ppar
## 36_ppar_coc ppar